<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Sorting and Collations | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="token-normalization.html" title="Normalizing Tokens">
<link rel="prev" href="character-folding.html" title="Unicode Character Folding">
<link rel="next" href="stemming.html" title="Reducing Words to Their Root Form">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="languages.html">Dealing with Human Language</a></span>
»
<span class="breadcrumb-link"><a href="token-normalization.html">Normalizing Tokens</a></span>
»
<span class="breadcrumb-node">Sorting and Collations</span>
</div>
<div class="navheader">
<span class="prev">
<a href="character-folding.html">« Unicode Character Folding</a>
</span>
<span class="next">
<a href="stemming.html">Reducing Words to Their Root Form »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="sorting-collations"></a>Sorting and Collations<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h2>
</div></div></div>
<p>So far in this chapter, we have looked at how to normalize tokens for the
purposes of search.  The final use case to consider in this chapter
is that of string sorting.</p>
<p>In <a class="xref" href="multi-fields.html" title="String Sorting and Multifields">String Sorting and Multifields</a>, we explained that Elasticsearch cannot sort on an
<code class="literal">analyzed</code> string field, and demonstrated how to use <em>multifields</em> to index
the same field once as an <code class="literal">analyzed</code> field for search, and once as a
<code class="literal">not_analyzed</code> field for sorting.</p>
<p>The problem with sorting on an <code class="literal">analyzed</code> field is not that it uses
an analyzer, but that the analyzer tokenizes the string value into
multiple tokens, like  a <em>bag of words</em>, and Elasticsearch doesn’t know which
token to use for sorting.</p>
<p>Relying on a <code class="literal">not_analyzed</code> field for sorting is inflexible: it allows
us to sort on only the exact value of the original string.  However, we <em>can</em> use
analyzers to achieve other sort orders, as long as our chosen analyzer always emits only a single token for each string value.</p>
<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="case-insensitive-sorting"></a>Case-Insensitive Sorting<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h3>
</div></div></div>
<p>Imagine that we have three <code class="literal">user</code> documents whose <code class="literal">name</code> fields contain <code class="literal">Boffey</code>,
<code class="literal">BROWN</code>, and <code class="literal">bailey</code>, respectively.  First we will apply the technique
described in <a class="xref" href="multi-fields.html" title="String Sorting and Multifields">String Sorting and Multifields</a> of using a <code class="literal">not_analyzed</code> field for sorting:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index
{
  "mappings": {
    "user": {
      "properties": {
        "name": { <a id="CO148-1"></a><i class="conum" data-value="1"></i>
          "type": "string",
          "fields": {
            "raw": { <a id="CO148-2"></a><i class="conum" data-value="2"></i>
              "type":  "string",
              "index": "not_analyzed"
            }
          }
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO148-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">analyzed</code> <code class="literal">name</code> field is used for search.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO148-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">not_analyzed</code> <code class="literal">name.raw</code> field is used for sorting.</p>
</td>
</tr>
</table>
</div>
<p>We can index some documents and try sorting:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index/user/1
{ "name": "Boffey" }

PUT /my_index/user/2
{ "name": "BROWN" }

PUT /my_index/user/3
{ "name": "bailey" }

GET /my_index/user/_search?sort=name.raw</pre>
</div>
<p>The preceding search request would return the documents in this order: <code class="literal">BROWN</code>,
<code class="literal">Boffey</code>, <code class="literal">bailey</code>. This is known as <em>lexicographical order</em> as opposed to
<em>alphabetical order</em>.  Essentially, the bytes used to represent capital
letters have a lower value than the bytes used to represent lowercase letters,
and so the names are sorted with the lowest bytes first.</p>
<p>That may make sense to a computer, but doesn’t make much sense to human beings
who would reasonably expect these names to be sorted alphabetically,
regardless of case.  To achieve this, we need to index each name in a way that
the byte ordering corresponds to the sort order that we want.</p>
<p>In other words, we need an analyzer that will emit a single lowercase token:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "case_insensitive_sort": {
          "tokenizer": "keyword",    <a id="CO149-1"></a><i class="conum" data-value="1"></i>
          "filter":  [ "lowercase" ] <a id="CO149-2"></a><i class="conum" data-value="2"></i>
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO149-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">keyword</code> tokenizer emits the original input string
as a single unchanged token.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO149-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">lowercase</code> token filter lowercases the token.</p>
</td>
</tr>
</table>
</div>
<p>With the <code class="literal">case_insensitive_sort</code> analyzer in place, we can now use it in our
multifield:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index/_mapping/user
{
  "properties": {
    "name": {
      "type": "string",
      "fields": {
        "lower_case_sort": { <a id="CO150-1"></a><i class="conum" data-value="1"></i>
          "type":     "string",
          "analyzer": "case_insensitive_sort"
        }
      }
    }
  }
}

PUT /my_index/user/1
{ "name": "Boffey" }

PUT /my_index/user/2
{ "name": "BROWN" }

PUT /my_index/user/3
{ "name": "bailey" }

GET /my_index/user/_search?sort=name.lower_case_sort</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO150-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">name.lower_case_sort</code> field will provide us with
case-insensitive sorting.</p>
</td>
</tr>
</table>
</div>
<p>The preceding search request returns our documents in the order that we expect:
<code class="literal">bailey</code>, <code class="literal">Boffey</code>, <code class="literal">BROWN</code>.</p>
<p>But is this order correct? It appears to be correct as it matches our
expectations, but our expectations have probably been influenced by the fact
that this book is in English and all of the letters used in our example belong
to the English alphabet.</p>
<p>What if we were to add the German name <em>Böhm</em>?</p>
<p>Now our names would be returned in this order: <code class="literal">bailey</code>, <code class="literal">Boffey</code>, <code class="literal">BROWN</code>,
<code class="literal">Böhm</code>. The reason that <code class="literal">Böhm</code> comes after <code class="literal">BROWN</code> is that these words are
still being sorted by the values of the bytes used to represent them, and an
<code class="literal">r</code> is stored as the byte <code class="literal">0x72</code>, while <code class="literal">ö</code> is stored as <code class="literal">0xF6</code> and so is
sorted last. The byte value of each character is an accident of history.</p>
<p>Clearly, the default sort order is meaningless for anything other than plain
English. In fact, there is no “right” sort order.  It all depends on the
language you speak.</p>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_differences_between_languages"></a>Differences Between Languages<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h3>
</div></div></div>
<p>Every language has its own sort order, and sometimes even multiple sort
orders. Here are a few examples of how our four names from the previous
section would be sorted in different contexts:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
English:          <code class="literal">bailey</code>, <code class="literal">boffey</code>, <code class="literal">böhm</code>,   <code class="literal">brown</code>
</li>
<li class="listitem">
German:           <code class="literal">bailey</code>, <code class="literal">boffey</code>, <code class="literal">böhm</code>,   <code class="literal">brown</code>
</li>
<li class="listitem">
German phonebook: <code class="literal">bailey</code>, <code class="literal">böhm</code>,   <code class="literal">boffey</code>, <code class="literal">brown</code>
</li>
<li class="listitem">
Swedish:          <code class="literal">bailey</code>, <code class="literal">boffey</code>, <code class="literal">brown</code>,  <code class="literal">böhm</code>
</li>
</ul>
</div>
<div class="note admon">
<div class="icon"></div>
<div class="admon_content">
<p>The reason that the German phonebook sort order places <code class="literal">böhm</code> <em>before</em> <code class="literal">boffey</code>
is that <code class="literal">ö</code> and <code class="literal">oe</code> are considered synonyms when dealing with names and
places, so <code class="literal">böhm</code> is sorted as if it had been written as <code class="literal">boehm</code>.</p>
</div>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="uca"></a>Unicode Collation Algorithm<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h3>
</div></div></div>
<p><em>Collation</em> is the process of sorting text into a predefined order.  The
<em>Unicode Collation Algorithm</em>, or UCA (see
<a href="http://www.unicode.org/reports/tr10/" class="ulink" target="_top"><em>www.unicode.org/reports/tr10</em></a>) defines a
method of sorting strings into the order defined in a <em>Collation Element
Table</em> (usually referred to just as a <em>collation</em>).</p>
<p>The UCA also defines the <em>Default Unicode Collation Element Table</em>, or <em>DUCET</em>,
which defines the default sort order for all Unicode characters, regardless of
language. As you have already seen, there is no single correct sort order, so
DUCET is designed to annoy as few people as possible as seldom as possible,
but it is far from being a panacea for all sorting woes.</p>
<p>Instead, language-specific collations exist for pretty much every language
under the sun. Most use DUCET as their starting point and add a few custom
rules to deal with the peculiarities of each language.</p>
<p>The UCA takes a string and a collation as inputs and outputs a binary sort
key. Sorting a collection of strings according to the specified collation then
becomes a simple comparison of their binary sort keys.</p>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_unicode_sorting"></a>Unicode Sorting<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h3>
</div></div></div>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>The approach described in this section will probably change in a future version of
Elasticsearch. Check the <a class="xref" href="icu-plugin.html" title="Installing the ICU Plug-in"><code class="literal">icu</code> plugin</a> documentation for the
latest information.</p>
</div>
</div>
<p>The <code class="literal">icu_collation</code> token filter defaults to using the DUCET
collation for sorting.  This is already an improvement over the default sort.  To use it,
all we need to do is to create an analyzer that uses the default
<code class="literal">icu_collation</code> filter:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "ducet_sort": {
          "tokenizer": "keyword",
          "filter": [ "icu_collation" ] <a id="CO151-1"></a><i class="conum" data-value="1"></i>
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO151-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>Use the default DUCET collation.</p>
</td>
</tr>
</table>
</div>
<p>Typically, the field that we want to sort on is also a field that we want to
search on, so we use the same multifield approach as we used in
<a class="xref" href="sorting-collations.html#case-insensitive-sorting" title="Case-Insensitive Sorting">Case-Insensitive Sorting</a>:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index/_mapping/user
{
  "properties": {
    "name": {
      "type": "string",
      "fields": {
        "sort": {
          "type": "string",
          "analyzer": "ducet_sort"
        }
      }
    }
  }
}</pre>
</div>
<p>With this mapping, the <code class="literal">name.sort</code> field will contain a sort key that will be
used only for sorting.  We haven’t specified a language, so it defaults to
using the <a class="xref" href="sorting-collations.html#uca" title="Unicode Collation Algorithm">DUCET collation</a>.</p>
<p>Now, we can reindex our example docs and test the sorting:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index/user/_bulk
{ "index": { "_id": 1 }}
{ "name": "Boffey" }
{ "index": { "_id": 2 }}
{ "name": "BROWN" }
{ "index": { "_id": 3 }}
{ "name": "bailey" }
{ "index": { "_id": 4 }}
{ "name": "Böhm" }

GET /my_index/user/_search?sort=name.sort</pre>
</div>
<div class="note admon">
<div class="icon"></div>
<div class="admon_content">
<p>Note that the <code class="literal">sort</code> key returned with each document, which in earlier
examples looked like <code class="literal">brown</code> and <code class="literal">böhm</code>, now looks like gobbledygook:
<code class="literal">ᖔ乏昫တ倈⠀\u0001</code>.  The reason is that the <code class="literal">icu_collation</code> filter emits keys
intended only for efficient sorting, not for any other purposes.</p>
</div>
</div>
<p>The preceding search returns our docs in this order: <code class="literal">bailey</code>, <code class="literal">Boffey</code>, <code class="literal">Böhm</code>,
<code class="literal">BROWN</code>. This is already an improvement, as the sort order is now correct for
English and German, but it is still incorrect for German phonebooks and
Swedish. The next step is to customize our mapping for different languages.</p>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_specifying_a_language"></a>Specifying a Language<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <code class="literal">icu_collation</code> filter can be configured to use the collation table for a
specific language, a country-specific version of a language, or some other
subset such as German phonebooks.  This can be done by creating a custom version
of the token filter by using the <code class="literal">language</code>, <code class="literal">country</code>, and <code class="literal">variant</code> parameters
as follows:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
English
</span>
</dt>
<dd>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{ "language": "en" }</pre>
</div>
</dd>
<dt>
<span class="term">
German
</span>
</dt>
<dd>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{ "language": "de" }</pre>
</div>
</dd>
<dt>
<span class="term">
Austrian German
</span>
</dt>
<dd>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{ "language": "de", "country": "AT" }</pre>
</div>
</dd>
<dt>
<span class="term">
German phonebooks
</span>
</dt>
<dd>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">{ "language": "de", "variant": "@collation=phonebook" }</pre>
</div>
</dd>
</dl>
</div>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>You can read more about the locales supported by ICU at:
<a href="http://userguide.icu-project.org/locale" class="ulink" target="_top">http://userguide.icu-project.org/locale</a>.</p>
</div>
</div>
<p>This example shows how to set up the German phonebook sort order:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index
{
  "settings": {
    "number_of_shards": 1,
    "analysis": {
      "filter": {
        "german_phonebook": { <a id="CO152-1"></a><i class="conum" data-value="1"></i>
          "type":     "icu_collation",
          "language": "de",
          "country":  "DE",
          "variant":  "@collation=phonebook"
        }
      },
      "analyzer": {
        "german_phonebook": { <a id="CO152-2"></a><i class="conum" data-value="2"></i>
          "tokenizer": "keyword",
          "filter":  [ "german_phonebook" ]
        }
      }
    }
  },
  "mappings": {
    "user": {
      "properties": {
        "name": {
          "type": "string",
          "fields": {
            "sort": { <a id="CO152-3"></a><i class="conum" data-value="3"></i>
              "type":     "string",
              "analyzer": "german_phonebook"
            }
          }
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO152-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>First we create a version of the <code class="literal">icu_collation</code> customized for the German phonebook collation.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO152-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>Then we wrap that up in a custom analyzer.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO152-3"><i class="conum" data-value="3"></i></a></p>
</td>
<td align="left" valign="top">
<p>And we apply it to our <code class="literal">name.sort</code> field.</p>
</td>
</tr>
</table>
</div>
<p>Reindex the data and repeat the same search as we used previously:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index/user/_bulk
{ "index": { "_id": 1 }}
{ "name": "Boffey" }
{ "index": { "_id": 2 }}
{ "name": "BROWN" }
{ "index": { "_id": 3 }}
{ "name": "bailey" }
{ "index": { "_id": 4 }}
{ "name": "Böhm" }

GET /my_index/user/_search?sort=name.sort</pre>
</div>
<p>This now returns our docs in this order: <code class="literal">bailey</code>, <code class="literal">Böhm</code>, <code class="literal">Boffey</code>,  <code class="literal">BROWN</code>.
In the German phonebook collation, <code class="literal">Böhm</code> is the equivalent of <code class="literal">Boehm</code>, which
comes before <code class="literal">Boffey</code>.</p>
<div class="section">
<div class="titlepage"><div><div>
<h4 class="title">
<a id="_multiple_sort_orders"></a>Multiple sort orders<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h4>
</div></div></div>
<p>The same field can support multiple sort orders by using a multifield for
each language:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index/_mapping/_user
{
  "properties": {
    "name": {
      "type": "string",
      "fields": {
        "default": {
          "type":     "string",
          "analyzer": "ducet" <a id="CO153-1"></a><i class="conum" data-value="1"></i>
        },
        "french": {
          "type":     "string",
          "analyzer": "french" <a id="CO153-2"></a><i class="conum" data-value="1"></i>
        },
        "german": {
          "type":     "string",
          "analyzer": "german_phonebook" <a id="CO153-3"></a><i class="conum" data-value="1"></i>
        },
        "swedish": {
          "type":     "string",
          "analyzer": "swedish" <a id="CO153-4"></a><i class="conum" data-value="1"></i>
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO153-1"><i class="conum" data-value="1"></i></a><a href="#CO153-2"></a><a href="#CO153-3"></a><a href="#CO153-4"></a></p>
</td>
<td align="left" valign="top">
<p>We would need to create the corresponding analyzers for each of these collations.</p>
</td>
</tr>
</table>
</div>
<p>With this mapping in place, results can be ordered correctly for French,
German, and Swedish users, just by sorting on the <code class="literal">name.french</code>, <code class="literal">name.german</code>,
or <code class="literal">name.swedish</code> fields.  Unsupported languages can fall back to using the
<code class="literal">name.default</code> field, which uses the DUCET sort order.</p>
</div>

</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_customizing_collations"></a>Customizing Collations<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/220_Token_normalization/60_Sorting_and_collations.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <code class="literal">icu_collation</code> token filter takes many more options than just <code class="literal">language</code>,
<code class="literal">country</code>, and <code class="literal">variant</code>,  which can be used to tailor the sorting algorithm.
Options are available that will do the following:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
Ignore diacritics
</li>
<li class="listitem">
Order uppercase first or last, or ignore case
</li>
<li class="listitem">
Take punctuation and whitespace into account or ignore it
</li>
<li class="listitem">
Sort numbers as strings or by their numeric value
</li>
<li class="listitem">
Customize existing collations or define your own custom collations
</li>
</ul>
</div>
<p>Details of these options are beyond the scope of this book, but more information
can be found in the <a href="https://github.com/elasticsearch/elasticsearch-analysis-icu" class="ulink" target="_top">ICU plug-in documentation</a>
and in the <a href="http://userguide.icu-project.org/collation/concepts" class="ulink" target="_top">ICU project collation documentation</a>.</p>
</div>

</div>
<div class="navfooter">
<span class="prev">
<a href="character-folding.html">« Unicode Character Folding</a>
</span>
<span class="next">
<a href="stemming.html">Reducing Words to Their Root Form »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
